import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import holoviews as hv
import bokeh
from holoviews import opts
hv.extension('bokeh', 'matplotlib')
# считываем координаты регионов, список регионов и данные по фактическим и предсказанным данным
region_coords = pd.read_csv('region_coordinates.csv', sep=';').set_index('region')
regions = [int(reg) for reg in pd.read_csv('regions.csv').columns.values]
data = pd.read_csv('june_pred.csv', parse_dates=['tpep_pickup_datetime'])
data['pickup_day'] = data['tpep_pickup_datetime'].dt.floor('D')
data_grouped = data.groupby(['region', 'pickup_day'])[['trip_count', 'predicted']].sum().reset_index(['region', 'pickup_day'])
data_grouped.head(2)
| region | pickup_day | trip_count | predicted | |
|---|---|---|---|---|
| 0 | 1075 | 2016-05-31 | 26.0 | 38.877280 |
| 1 | 1075 | 2016-06-01 | 1533.0 | 1602.541519 |
# подготавливаем данные для визуализации
days = []
for day in range(1, 31):
cells = []
for reg in regions:
south, north = list(region_coords[region_coords.index == reg][['south', 'north']].values[0])
west, east = list(region_coords[region_coords.index == reg][['west','east']].values[0])
cells.append({
'region': reg,
'trips': data_grouped[(data_grouped['region'] == reg) &
(data_grouped['pickup_day'].dt.day == day)]['trip_count'].values[0],
'pred': data_grouped[(data_grouped['region'] == reg) &
(data_grouped['pickup_day'].dt.day == day)]['predicted'].values[0],
'lats': [south, south, north, north],
'lons': [west, east, east, west]
})
days.append(cells)
trips_dic = {day: hv.Polygons(days[day-1], ['lons', 'lats'], [('trips', 'Trip count')]) for day in range(1,31)}
pred_dic = {day: hv.Polygons(days[day-1], ['lons', 'lats'], [('pred', 'Prediction')]) for day in range(1,31)}
holomap_preds = hv.HoloMap(pred_dic, kdims='Day')
holomap_trips = hv.HoloMap(trips_dic, kdims='Day')
layout = holomap_trips + holomap_preds
layout
vdims = [('trip_count', 'Observed'), ('predicted', 'Predicted')]
ds = hv.Dataset(data, [('tpep_pickup_datetime', 'Time'), ('region', 'Area (Region)')], vdims)
layout = (ds.to(hv.Curve,'tpep_pickup_datetime', 'trip_count') + ds.to(hv.Curve, 'tpep_pickup_datetime', 'predicted')).cols(1)
layout.opts(
opts.Curve(width=600, height=250, framewise=True))